notebook.community

Edit and run



In [1]:

    
import scipy as sp
import matplotlib.pylab as plt
import seaborn as sea
import pandas as pd

%pylab inline









    



/opt/conda/lib/python3.5/site-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.
  warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')
/opt/conda/lib/python3.5/site-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.
  warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')






    



Populating the interactive namespace from numpy and matplotlib






    



WARNING: pylab import has clobbered these variables: ['plt']
`%matplotlib` prevents importing * from pylab and numpy



In [2]:

    
from example import read_games
recs = list(read_games())



In [3]:

    
from collections import defaultdict
topkeys = defaultdict(int)
for rec in recs:
    for k in rec['data'].keys():
        topkeys[k] += 1
print(list(sorted(topkeys.keys())))









    



['about_the_game', 'achievements', 'background', 'categories', 'controller_support', 'demos', 'detailed_description', 'developers', 'dlc', 'drm_notice', 'ext_user_account_notice', 'genres', 'header_image', 'is_free', 'legal_notice', 'linux_requirements', 'mac_requirements', 'metacritic', 'movies', 'name', 'package_groups', 'packages', 'pc_requirements', 'platforms', 'price_overview', 'publishers', 'recommendations', 'release_date', 'required_age', 'reviews', 'screenshots', 'short_description', 'steam_appid', 'support_info', 'supported_languages', 'type', 'website']



In [4]:

    
for ky,tot in sorted(topkeys.items()):
    count = 0
    uniq = set()
    mn, mx = None, None
    
    for rec in recs:
        val = rec['data'].get(ky, None)
        if not val:
            continue
        count += 1
        if isinstance(val, dict):
            val = "<DICT>"
        elif isinstance(val, list):
            val = "<LIST>"
        else:
            val = str(val).strip()
        uniq.add(val)
        if mn is None or val < mn:
            mn = val
        if mx is None or val > mx:
            mx = val
    print("%-25s %8d %8d %20s %20s" % (ky, tot, len(uniq), str(mn)[:20], str(mx)[:20]))









    



about_the_game               12624    11820 !nsane is a nod to t 日本語バージョンダウンロード開始されまし
achievements                  6793        1               <DICT>               <DICT>
background                   12624    11774 http://cdn.akamai.st http://cdn.akamai.st
categories                   12388        1               <LIST>               <LIST>
controller_support            3517        1                 full                 full
demos                         1162        1               <LIST>               <LIST>
detailed_description         12624    11821 !nsane is a nod to t 日本語バージョンダウンロード開始されまし
developers                   11989        1               <LIST>               <LIST>
dlc                           2111        1               <LIST>               <LIST>
drm_notice                      82       43 Act Control<br>5 mac                valve
ext_user_account_notice        144       89 1C Game Studios Acco       www.doak.co.za
genres                       11881        1               <LIST>               <LIST>
header_image                 12624    12442 http://cdn.akamai.st http://cdn.akamai.st
is_free                      12624        1                 True                 True
legal_notice                  5319     4519 "Age of Steel: Recha           ⓒ REMIMORY
linux_requirements           12624        1               <DICT>               <DICT>
mac_requirements             12624        1               <DICT>               <DICT>
metacritic                    2267        1               <DICT>               <DICT>
movies                       11012        1               <LIST>               <LIST>
name                         12624    12468 ! That Bastard Is Tr              ｈａｃｋｍｕｄ
package_groups               12624        1               <LIST>               <LIST>
packages                     10488        1               <LIST>               <LIST>
pc_requirements              12624        1               <DICT>               <DICT>
platforms                    12624        1               <DICT>               <DICT>
price_overview               10302        1               <DICT>               <DICT>
publishers                   12624        1               <LIST>               <LIST>
recommendations               4863        1               <DICT>               <DICT>
release_date                 12624        1               <DICT>               <DICT>
required_age                 12624       12                    0                    7
reviews                       3235     3205 "...strategy blender “단순, 명쾌, 통쾌함 속 예술.”
screenshots                  11923        1               <LIST>               <LIST>
short_description            12624    10685 !nsane is a nod to t 日本語バージョンダウンロード開始されまし
steam_appid                  12624    12475                   10                99910
support_info                 12624        1               <DICT>               <DICT>
supported_languages          12590     2296 Arabic<strong>*</str Turkish, English, Si
type                         12624        1                 game                 game
website                      12624     8324          Edmundm.com     www.xenomorph.me



In [5]:

    
from random import choice
from collections import Counter
from pprint import PrettyPrinter
pp = PrettyPrinter(width=120)
pprint = pp.pprint

def fields(name):
    for rec in recs:
        v = rec['data'].get(name, None)
        if v:
            yield v
            
field_values = list(fields('support_info'))
def out(v):
    pprint(v)
    print("")
out(field_values[0])
out(field_values[-1])
for _ in range(10):
    out(choice(field_values))


# def scan_values():
#     for fv in field_values:
#         free, purchase, subscript = False, False, False
#         for pg in fv:
#             if pg.get('is_recurring_subscription', '') == 'true':
#                 subscript = True
#             for s in pg.get('subs', list()):
#                 if s.get('is_free_license', None):
#                     free = True
#                 elif s.get('price_in_cents_with_discount', 0) > 0:
#                     purchase = True
#         yield (free, purchase, subscript)
# pprint(Counter(list(scan_values())))
           
# pprint(Counter([k for fv in field_values for k in fv.keys()]))

# pprint(Counter([i.get('description', 'MISSING').lower() for fv in field_values for i in fv]))









    



{'email': '', 'url': 'http://steamcommunity.com/app/10'}

{'email': 'jooomly@gmail.com', 'url': ''}

{'email': '', 'url': 'http://www.magix-audio.com/support/'}

{'email': '', 'url': ''}

{'email': '', 'url': ''}

{'email': 'thatsmytrunks@gmail.com', 'url': ''}

{'email': 'support@fallentreegames.com', 'url': 'http://www.fallentreegames.com/support'}

{'email': 'support@soedesco.com', 'url': ''}

{'email': 'xdgonx@yandex.ru', 'url': ''}

{'email': 'support@rondomedia.de', 'url': 'http://www.rondomedia.de/support.html'}

{'email': 'support@jetdogs.com', 'url': 'http://jetdogs.com'}

{'email': '', 'url': ''}

Genres

Like categories, it's a list of dicts. Use the dict['description'] for text and discretize:

If description is empty string, then ignore it.

All descriptions not described below are GenreIsOther

These descriptions become GenreIsXXX:

indie
action
adventure
casual
strategy
rpg
simulation
early access
free to play
sports
racing
massively multiplayer

These description become GenreIsNotGame:

utilities
design & illustration
animation & modeling
software training
education
audio production
video production
web publishing
photo editing
accounting

Requirements

Applied to linux_requirements, mac_requirements, and pc_requirements

Value is dictionary with possible keys:

minimum
recommended

Turn into boolean, i.e.

LinuxReqsHaveMin = True if val.get('minimum', '') else False
LinuxReqsHaveRec = True if val.get('recommended', '') else False
MacReqsHaveMin = True if val.get('minimum', '') else False
MacReqsHaveRec = True if val.get('recommended', '') else False
PCReqsHaveMin = True if val.get('minimum', '') else False
PCReqsHaveRec = True if val.get('recommended', '') else False

Package Groups

3 boolean columns: FreeVerAvail, PurchaseAvail, SubscriptionAvail

Code for all:

def package_groups(rec):
    pgs = rec.get("data", dict()).get("package_groups", list)
    free, purchase, subscript = False, False, False
    for pg in pgs:
        if pg.get('is_recurring_subscription', '') == 'true':
            subscript = True
        for s in pg.get('subs', list()):
            if s.get('is_free_license', None):
                free = True
            elif s.get('price_in_cents_with_discount', 0) > 0:
                purchase = True
    return (free, purchase, subscript)



In [6]:

    
# Attempt to read our shiny CSV file
gf = pd.read_csv('games-features.csv')
print(gf.columns)
gf.head()









    



Index(['QueryID', 'ResponseID', 'QueryName', 'ResponseName', 'ReleaseDate',
       'RequiredAge', 'DemoCount', 'DeveloperCount', 'DLCCount', 'Metacritic',
       'MovieCount', 'PackageCount', 'RecommendationCount', 'PublisherCount',
       'ScreenshotCount', 'AchievementCount', 'AchievementHighlightedCount',
       'ControllerSupport', 'IsFree', 'FreeVerAvail', 'PurchaseAvail',
       'SubscriptionAvail', 'PlatformWindows', 'PlatformLinux', 'PlatformMac',
       'PCReqsHaveMin', 'PCReqsHaveRec', 'LinuxReqsHaveMin',
       'LinuxReqsHaveRec', 'MacReqsHaveMin', 'MacReqsHaveRec',
       'CategorySinglePlayer', 'CategoryMultiplayer', 'CategoryCoop',
       'CategoryMMO', 'CategoryInAppPurchase', 'CategoryIncludeSrcSDK',
       'CategoryIncludeLevelEditor', 'CategoryVRSupport', 'GenreIsNonGame',
       'GenreIsIndie', 'GenreIsAction', 'GenreIsAdventure', 'GenreIsCasual',
       'GenreIsStrategy', 'GenreIsRPG', 'GenreIsSimulation',
       'GenreIsEarlyAccess', 'GenreIsFreeToPlay', 'GenreIsSports',
       'GenreIsRacing', 'GenreIsMassivelyMultiplayer', 'PriceCurrency',
       'PriceInitial', 'PriceFinal', 'SupportEmail', 'SupportURL', 'AboutText',
       'Background', 'ShortDescrip', 'DetailedDescrip', 'DRMNotice',
       'ExtUserAcctNotice', 'HeaderImage', 'LegalNotice', 'Reviews',
       'SupportedLanguages', 'Website', 'PCMinReqsText', 'PCRecReqsText',
       'LinuxMinReqsText', 'LinuxRecReqsText', 'MacMinReqsText',
       'MacRecReqsText'],
      dtype='object')






    Out[6]:






  
    
      
      QueryID
      ResponseID
      QueryName
      ResponseName
      ReleaseDate
      RequiredAge
      DemoCount
      DeveloperCount
      DLCCount
      Metacritic
      ...
      LegalNotice
      Reviews
      SupportedLanguages
      Website
      PCMinReqsText
      PCRecReqsText
      LinuxMinReqsText
      LinuxRecReqsText
      MacMinReqsText
      MacRecReqsText
    
  
  
    
      0
      10
      10
      Counter-Strike
      Counter-Strike
      Nov 1 2000
      0
      0
      1
      0
      88
      ...
      
      
      English French German Italian Spanish Simplifi...
      None
      Minimum: 500 mhz processor 96mb ram 16mb video...
      
      Minimum: Linux Ubuntu 12.04 Dual-core from Int...
      
      Minimum: OS X  Snow Leopard 10.6.3 1GB RAM 4GB...
      
    
    
      1
      20
      20
      Team Fortress Classic
      Team Fortress Classic
      Apr 1 1999
      0
      0
      1
      0
      0
      ...
      
      
      English French German Italian Spanish
      None
      Minimum: 500 mhz processor 96mb ram 16mb video...
      
      Minimum: Linux Ubuntu 12.04 Dual-core from Int...
      
      Minimum: OS X  Snow Leopard 10.6.3 1GB RAM 4GB...
      
    
    
      2
      30
      30
      Day of Defeat
      Day of Defeat
      May 1 2003
      0
      0
      1
      0
      79
      ...
      
      
      English French German Italian Spanish
      http://www.dayofdefeat.com/
      Minimum: 500 mhz processor 96mb ram 16mb video...
      
      Minimum: Linux Ubuntu 12.04 Dual-core from Int...
      
      Minimum: OS X  Snow Leopard 10.6.3 1GB RAM 4GB...
      
    
    
      3
      40
      40
      Deathmatch Classic
      Deathmatch Classic
      Jun 1 2001
      0
      0
      1
      0
      0
      ...
      
      
      English French German Italian Spanish
      None
      Minimum: 500 mhz processor 96mb ram 16mb video...
      
      Minimum: Linux Ubuntu 12.04 Dual-core from Int...
      
      Minimum: OS X  Snow Leopard 10.6.3 1GB RAM 4GB...
      
    
    
      4
      50
      50
      Half-Life: Opposing Force
      Half-Life: Opposing Force
      Nov 1 1999
      0
      0
      1
      0
      0
      ...
      
      
      English French German Korean
      None
      Minimum: 500 mhz processor 96mb ram 16mb video...
      
      Minimum: Linux Ubuntu 12.04 Dual-core from Int...
      
      Minimum: OS X  Snow Leopard 10.6.3 1GB RAM 4GB...
      
    
  

5 rows × 74 columns



In [7]:

    
def invest(cname):
    print(cname, " > 0")
    nums = gf[gf[cname] > 0][cname]
    print(nums.describe())
    plt.figure(figsize=(8,6))
    sea.distplot(nums)
    print("")
invest("DemoCount")
invest("DLCCount")
invest("RecommendationCount")









    



DemoCount  > 0
count    1162.000000
mean        1.001721
std         0.041469
min         1.000000
25%         1.000000
50%         1.000000
75%         1.000000
max         2.000000
Name: DemoCount, dtype: float64

DLCCount  > 0
count    2111.000000
mean        5.052108
std        24.812933
min         1.000000
25%         1.000000
50%         1.000000
75%         3.000000
max       630.000000
Name: DLCCount, dtype: float64

RecommendationCount  > 0
count       4863.000000
mean        3232.043389
std        25019.718648
min          101.000000
25%          201.000000
50%          441.000000
75%         1405.500000
max      1427633.000000
Name: RecommendationCount, dtype: float64



In [9]:

    
gf["PCMinReqsText"]









    Out[9]:





0        Minimum: 500 mhz processor 96mb ram 16mb video...
1        Minimum: 500 mhz processor 96mb ram 16mb video...
2        Minimum: 500 mhz processor 96mb ram 16mb video...
3        Minimum: 500 mhz processor 96mb ram 16mb video...
4        Minimum: 500 mhz processor 96mb ram 16mb video...
5        Minimum: 500 mhz processor 96mb ram 16mb video...
6        Minimum: 500 mhz processor 96mb ram 16mb video...
7        Minimum: 500 mhz processor 96mb ram 16mb video...
8        Minimum: 500 mhz processor 96mb ram 16mb video...
9        Minimum: 500 mhz processor 96mb ram 16mb video...
10                                                        
11       Minimum: 1.7 GHz Processor 512MB RAM DirectX® ...
12       Minimum: 1.2 GHz Processor 256MB RAM DirectX 7...
13       Minimum: 1.7 GHz Processor 512MB RAM DirectX® ...
14       Minimum: 1.2 GHz Processor 256MB RAM DirectX 7...
15       Minimum: Pentium 4 2.4GHz or AMD 2800+ Process...
16       Minimum:OS: Windows(r) 7 (32/64-bit)/Vista/XPP...
17       Minimum: 1.7 GHz Processor 512MB RAM DirectX® ...
18       Minimum: 1.7 GHz Processor 512MB RAM DirectX® ...
19       Minimum: 1.7 GHz Processor 512MB RAM DirectX® ...
20       Minimum:OS: Windows(r) 7 (32/64-bit)/Vista/XPP...
21       Minimum:                    Supported OS: Wind...
22       Minimum:OS: Windows(r) 7 32/64-bit / Vista 32/...
23       Minimum:OS: Windows 7 or newerProcessor: Dual ...
24       Minimum:OS: Windows 7 / Vista / XPProcessor: 3...
25       Minimum:OS: Windows(r) 7 / Vista / Vista64 / X...
26       Minimum:OS: Windows 7 / Vista / XPProcessor: 3...
27       Minimum:OS: Windows(r) 7/Vista/XPProcessor: In...
28       Minimum: Pentium3 800 MHz (or compatible) 256 ...
29       Minimum:OS: Windows XPProcessor: 1.2 GHZ or Eq...
                               ...                        
12594    Minimum:OS: Windows 7 or higerProcessor: 1.4GH...
12595    Minimum:OS: Windows 7Processor: 1.5 GhzDirectX...
12596    Minimum:OS: Windows 7Processor: Intel Core I5-...
12597    Minimum:OS: Windows 7 SP1 Windows 8.1 or Windo...
12598    Minimum:OS: Windows 7 (32-bit)Processor: 2.66 ...
12599    Minimum:OS: Microsoft(r) Windows(r) Vista / 7 ...
12600    Minimum:OS: Windows 8Processor: 1 Ghz CPUMemor...
12601    Minimum:OS: Windows XP / Vista / Win 7 / Win 8...
12602    Minimum:OS: Windows XPProcessor: 1 GHzMemory: ...
12603    Minimum:OS: Windows XP SP 3Processor: 1 GHzMem...
12604    Minimum:OS: Windows XPProcessor: Any Dual-Core...
12605    Minimum:OS: Windows XP/Windows Vista/Windows 7...
12606    Minimum:OS: Windows(r) 7 / Windows(r) 8 / Wind...
12607    Minimum:OS: Vista SP1 Win7Processor: 2.4 GHz P...
12608                               Minimum:OS: Windows XP
12609    Minimum:OS: Windows 7 64 BitProcessor: Core i3...
12610    Minimum:OS: Windows 7 or newerProcessor: Intel...
12611    Minimum:OS: Windows 7Processor: Intel(r) i5-45...
12612    Minimum:OS: Windows 7 64-bit or laterProcessor...
12613    Minimum:OS: Windows 7Processor: 2.0 GhzMemory:...
12614    Minimum:OS: Windows 7Processor: Dual Core CPU ...
12615                               Minimum:OS: Windows XP
12616    Minimum:OS: Microsoft(r) Windows Server 2008 W...
12617    Minimum:OS: Windows 7Processor: Intel Core 2 Q...
12618    Minimum:OS: Windows 7Processor: Intel core i5 ...
12619    Minimum:OS: Windows 10Processor: Intel i5-4590...
12620    Minimum:OS: Windows XPProcessor: Intel Celeron...
12621    Minimum:OS: Windows XPProcessor: 600 MhzMemory...
12622    Minimum:OS: Windows XPProcessor: 600 MhzMemory...
12623    Minimum:OS: Windows XP 7 8 10Processor: Dual C...
Name: PCMinReqsText, dtype: object

	QueryID	ResponseID	QueryName	ResponseName	ReleaseDate	DeveloperCount	Metacritic	...	SupportedLanguages	Website	PCMinReqsText	LinuxMinReqsText	MacMinReqsText
0	10	10	Counter-Strike	Counter-Strike	Nov 1 2000	1	88	...	English French German Italian Spanish Simplifi...	None	Minimum: 500 mhz processor 96mb ram 16mb video...	Minimum: Linux Ubuntu 12.04 Dual-core from Int...	Minimum: OS X Snow Leopard 10.6.3 1GB RAM 4GB...
1	20	20	Team Fortress Classic	Team Fortress Classic	Apr 1 1999	1	0	...	English French German Italian Spanish	None	Minimum: 500 mhz processor 96mb ram 16mb video...	Minimum: Linux Ubuntu 12.04 Dual-core from Int...	Minimum: OS X Snow Leopard 10.6.3 1GB RAM 4GB...
2	30	30	Day of Defeat	Day of Defeat	May 1 2003	1	79	...	English French German Italian Spanish	http://www.dayofdefeat.com/	Minimum: 500 mhz processor 96mb ram 16mb video...	Minimum: Linux Ubuntu 12.04 Dual-core from Int...	Minimum: OS X Snow Leopard 10.6.3 1GB RAM 4GB...
3	40	40	Deathmatch Classic	Deathmatch Classic	Jun 1 2001	1	0	...	English French German Italian Spanish	None	Minimum: 500 mhz processor 96mb ram 16mb video...	Minimum: Linux Ubuntu 12.04 Dual-core from Int...	Minimum: OS X Snow Leopard 10.6.3 1GB RAM 4GB...
4	50	50	Half-Life: Opposing Force	Half-Life: Opposing Force	Nov 1 1999	1	0	...	English French German Korean	None	Minimum: 500 mhz processor 96mb ram 16mb video...	Minimum: Linux Ubuntu 12.04 Dual-core from Int...	Minimum: OS X Snow Leopard 10.6.3 1GB RAM 4GB...

Categories

Genres

Requirements

Package Groups